{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import os\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dir = \"../data/raw/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "cwd = Path(os.curdir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dir = cwd / 'data' / 'raw'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_dir = Path().cwd()\n",
    "# uncomment when using in a .py file\n",
    "# file_dir = Path(__file__)\n",
    "base_dir = file_dir.parents[0]\n",
    "data_dir = base_dir / 'data'\n",
    "raw_data_dir = data_dir / 'raw'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_level_dir = raw_data_dir / 'word_level_29'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "syllable_level = raw_data_dir / 'syllable_level_npy_39'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "fnames_s = syllable_level.iterdir()\n",
    "out_data=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/b8313/coding/music/melody-generation-from-lyrics/data/raw/syllable_level_npy_39/9667f18cffb97b4ac92d681091c0ab8b.npy\n",
      "[[0.0, 0.08771933333333326, 293.6647679174076, 81.0], [0.2631579999999998, 0.2850878333333329, 293.6647679174076, 91.0], [0.5098686250000002, 0.2850878333333329, 261.6255653005986, 70.0], [3.421053999999999, 0.09320179166666698, 329.6275569128699, 83.0], [0.25219308333333323, 0.30701766666666686, 329.6275569128699, 89.0], [0.8004389166666677, 0.822368749999999, 329.6275569128699, 76.0], [3.1524135416666663, 0.15350883333333343, 369.9944227116344, 94.0], [0.27960537499999916, 0.16447375000000086, 369.9944227116344, 82.0], [0.26315800000000067, 0.3728071666666679, 369.9944227116344, 96.0], [0.5263160000000013, 0.5153510833333321, 369.9944227116344, 104.0], [0.5427633749999998, 0.4111843749999977, 329.6275569128699, 71.0], [0.520833541666665, 0.4934212500000008, 329.6275569128699, 85.0], [0.5153510833333357, 0.4989037083333301, 369.9944227116344, 76.0], [0.5427633749999998, 0.5153510833333321, 329.6275569128699, 70.0], [0.5263159999999978, 0.6524125416666671, 293.6647679174076, 63.0], [4.972589708333334, 0.301535208333334, 293.6647679174076, 99.0], [0.537280916666667, 0.285087833333332, 261.6255653005986, 71.0], [3.1304837083333332, 0.11513162500000007, 329.6275569128699, 87.0], [0.285087833333332, 0.1206140833333329, 329.6275569128699, 82.0], [0.25767554166666784, 0.2905702916666648, 329.6275569128699, 87.0], [0.520833541666665, 0.23574570833333297, 293.6647679174076, 71.0], [0.25767554166666784, 0.7510967916666651, 329.6275569128699, 75.0], [2.9221502916666644, 0.16995620833333192, 369.9944227116344, 94.0], [0.2631580000000042, 0.15899129166666626, 369.9944227116344, 67.0], [0.24671062499999863, 0.17543866666666474, 369.9944227116344, 82.0], [0.2631579999999971, 0.4111843750000048, 369.9944227116344, 72.0], [0.5317984583333342, 0.4440791250000018, 369.9944227116344, 92.0], [0.5263160000000013, 0.32894749999999817, 329.6275569128699, 84.0], [0.5208335416666685, 0.5043861666666629, 329.6275569128699, 87.0], [0.5263159999999942, 0.34539487500000376, 293.6647679174076, 85.0], [0.5317984583333342, 0.8497810416666667, 293.6647679174076, 83.0], [6.6282921250000015, 0.3947369999999992, 783.9908719634985, 92.0], [0.5153510833333357, 0.131578999999995, 587.3295358348151, 91.0], [0.2576755416666643, 0.4495615833333346, 587.3295358348151, 88.0], [0.4824563333333316, 0.25219308333333856, 523.2511306011972, 88.0], [0.2796053750000027, 0.3673247083333351, 493.8833012561241, 87.0], [1.3048250833333341, 0.10416670833333086, 293.6647679174076, 82.0], [0.2631579999999971, 0.1096491666666708, 293.6647679174076, 80.0], [0.2576755416666714, 0.21381587499999455, 293.6647679174076, 81.0], [0.5427633749999998, 0.5427633749999998, 329.6275569128699, 82.0], [0.7510967916666615, 0.202850958333336, 261.6255653005986, 81.0], [0.2631580000000042, 0.14254391666666777, 391.99543598174927, 76.0], [0.2741229166666628, 0.2631580000000042, 493.8833012561241, 87.0], [0.2741229166666699, 0.28508783333333554, 523.2511306011972, 92.0], [0.29057029166666837, 0.13706145833332783, 391.99543598174927, 74.0], [0.2247807916666602, 0.49342125000000436, 391.99543598174927, 83.0], [2.1052640000000054, 0.28508783333332843, 369.9944227116344, 75.0], [0.5427633749999998, 0.34539487499999666, 293.6647679174076, 76.0], [0.5317984583333342, 0.5263159999999942, 246.94165062806206, 81.0], [0.7675441666666671, 0.32894749999999817, 293.6647679174076, 77.0], [1.0526319999999956, 0.2631580000000042, 391.99543598174927, 84.0], [0.2796053750000027, 0.21929833333333448, 329.6275569128699, 71.0], [0.25219308333333146, 0.30701766666666686, 329.6275569128699, 80.0], [0.8333336666666682, 1.2828952499999957, 493.8833012561241, 93.0], [2.58772033333333, 0.1315790000000021, 293.6647679174076, 80.0], [0.26864045833333705, 0.26864045833332995, 293.6647679174076, 85.0], [0.26864045833332995, 0.23574570833333297, 261.6255653005986, 65.0], [0.24671062499999863, 0.2960527500000012, 293.6647679174076, 70.0], [0.30701766666666686, 0.2247807916666673, 329.6275569128699, 78.0], [0.5098686250000029, 0.6304827083333322, 293.6647679174076, 77.0], [2.6315799999999996, 0.21929833333333448, 329.6275569128699, 80.0], [0.26864045833333705, 0.17543866666666474, 329.6275569128699, 74.0], [0.2631579999999971, 0.1315790000000021, 329.6275569128699, 65.0], [0.25219308333333146, 0.2796053750000027, 329.6275569128699, 83.0], [0.773026625, 0.9100880833333349, 329.6275569128699, 77.0], [2.927632750000008, 0.19736849999999606, 369.9944227116344, 96.0], [0.26315799999999, 0.17543866666666474, 369.9944227116344, 72.0], [0.26864045833333705, 0.1480263750000006, 369.9944227116344, 54.0], [0.2631580000000042, 0.3892545416666593, 369.9944227116344, 86.0], [0.5098686249999957, 0.5317984583333271, 369.9944227116344, 94.0], [0.5482458333333398, 0.36184224999999515, 329.6275569128699, 80.0], [0.5208335416666614, 0.5208335416666614, 329.6275569128699, 88.0], [0.5263159999999942, 0.4824563333333316, 369.9944227116344, 75.0], [0.5427633750000069, 0.4989037083333301, 329.6275569128699, 84.0], [0.5098686249999957, 0.5975879583333352, 293.6647679174076, 77.0], [4.462721083333335, 0.1480263750000006, 293.6647679174076, 87.0], [0.2576755416666714, 0.13706145833333494, 293.6647679174076, 71.0], [0.25219308333332435, 0.2960527500000012, 293.6647679174076, 89.0], [0.8059213749999969, 0.773026625, 293.6647679174076, 83.0], [2.9002204583333366, 0.1315790000000021, 329.6275569128699, 92.0], [0.24671062500000573, 0.12609654166666928, 329.6275569128699, 83.0], [0.2631580000000042, 0.3125001249999997, 329.6275569128699, 92.0], [0.8004389166666641, 0.8333336666666611, 329.6275569128699, 81.0], [3.388159250000001, 0.23574570833332587, 369.9944227116344, 86.0], [0.3563597916666623, 0.23026324999999304, 369.9944227116344, 75.0], [0.4440791250000018, 0.24671062499999152, 369.9944227116344, 78.0], [0.301535208333334, 0.3179825833333325, 369.9944227116344, 98.0], [0.7894739999999985, 0.24671062499999152, 329.6275569128699, 95.0], [0.30701766666666686, 0.2576755416666714, 329.6275569128699, 95.0], [0.26864045833333705, 0.38377208333332646, 293.6647679174076, 94.0], [0.4605265000000003, 0.8771933333333237, 293.6647679174076, 94.0], [1.2938601666666614, 0.32894749999999817, 493.8833012561241, 101.0], [0.7949564583333313, 1.0581144583333355, 659.2551138257398, 90.0], [1.206140833333336, 0.20833341666666172, 659.2551138257398, 90.0], [2.2258780833333276, 0.3508773333333437, 659.2551138257398, 92.0], [0.36184225000000936, 0.3508773333333295, 739.9888454232688, 97.0], [0.38377208333332646, 0.30701766666666686, 783.9908719634985, 92.0], [0.3508773333333295, 0.3508773333333437, 783.9908719634985, 92.0], [0.5153510833333428, 0.09868424999999093, 587.3295358348151, 91.0], [0.2576755416666572, 0.4495615833333346, 587.3295358348151, 88.0], [0.4824563333333316, 0.25219308333333856, 523.2511306011972, 88.0], [0.2796053750000027, 0.3673247083333422, 493.8833012561241, 87.0], [1.3048250833333412, 0.10416670833332375, 293.6647679174076, 82.0], [0.26315799999999, 0.1096491666666708, 293.6647679174076, 80.0], [0.2576755416666714, 0.21381587499999455, 293.6647679174076, 81.0], [1.5570181666666656, 0.2192983333333416, 391.99543598174927, 76.0], [0.5482458333333398, 0.28508783333333554, 523.2511306011972, 92.0], [0.29057029166666837, 0.17543866666666474, 391.99543598174927, 74.0], [2.3300447916666656, 0.4824563333333316, 369.9944227116344, 75.0], [0.5427633749999927, 0.4824563333333316, 293.6647679174076, 76.0], [0.5317984583333413, 0.5263159999999942, 246.94165062806206, 81.0], [0.7675441666666671, 0.32894749999999817, 293.6647679174076, 77.0], [28.845212489583346, 2.481617812499991, 329.6275569128699, 81.0], [3.961397322916639, 1.3235295000000065, 493.8833012561241, 89.0], [1.4062500937500033, 0.1919117708333431, 523.2511306011972, 88.0], [0.1919117708333431, 0.1632872812500068, 493.8833012561241, 80.0], [-0.2011029479166666, 1.9489150208333683, 369.9944227116344, 86.0], [0.3465330729166851, 1.6182717916666718, 440.0, 81.0], [2.4922131979166693, 0.07944921874999977, 293.6647679174076, 88.0], [0.25953411458334585, 0.30720364583334003, 293.6647679174076, 92.0], [0.2648307291666754, 0.2542374999999879, 329.6275569128699, 77.0], [0.24364427083332885, 0.29661041666665255, 261.6255653005986, 77.0], [0.2648307291666754, 0.6091106770833221, 293.6647679174076, 89.0], [3.0455533854166674, 0.0847458333333293, 329.6275569128699, 76.0], [0.24894088541665838, 0.28072057291666397, 329.6275569128699, 88.0], [0.27542395833333444, 0.45021223958332257, 369.9944227116344, 82.0], [0.4872885416666577, 0.6250005208333391, 329.6275569128699, 84.0], [2.807205729166668, 0.20656796874999372, 369.9944227116344, 74.0], [0.27542395833333444, 0.3707630208333228, 369.9944227116344, 76.0], [0.4713986979166691, 0.3495765625000047, 369.9944227116344, 94.0], [0.4872885416666577, 0.3019070312500105, 369.9944227116344, 92.0], [0.28072057291666397, 0.2542375000000163, 329.6275569128699, 80.0], [0.31250026041666956, 0.32839010416665815, 329.6275569128699, 78.0], [0.4555088541666805, 0.42902578125000446, 329.6275569128699, 92.0], [0.5190682291666633, 0.3866528645833398, 369.9944227116344, 92.0], [0.47669531249999864, 0.46080546875001005, 329.6275569128699, 88.0], [0.5243648437499928, 0.6514835937500152, 293.6647679174076, 92.0], [4.841105729166685, 0.2701273437499765, 293.6647679174076, 101.0], [0.2542374999999879, 0.24364427083332885, 329.6275569128699, 84.0], [0.23834765624999932, 0.3019070312500105, 261.6255653005986, 84.0], [3.061443229166656, 0.1483052083333405, 329.6275569128699, 85.0], [0.2542375000000163, 0.12182213541666442, 329.6275569128699, 83.0], [0.2542374999999879, 0.296610416666681, 329.6275569128699, 89.0], [0.5137716145833338, 0.2648307291666754, 293.6647679174076, 83.0], [3.024366927083321, 0.14300859375001096, 369.9944227116344, 82.0], [0.2701273437500049, 0.14300859375001096, 369.9944227116344, 90.0], [0.2701273437500049, 0.15360182291667002, 369.9944227116344, 84.0], [0.24364427083332885, 0.4025427083333284, 369.9944227116344, 79.0], [0.5243648437500212, 0.4237291666666465, 369.9944227116344, 98.0], [0.5084750000000042, 0.31250026041664114, 329.6275569128699, 91.0], [0.48199192708332816, 0.4078393229166579, 329.6275569128699, 91.0], [0.5243648437499928, 0.4025427083333284, 369.9944227116344, 94.0], [0.5137716145833338, 0.41313593749998745, 329.6275569128699, 91.0], [0.4925851562499872, 0.7786023437500091, 293.6647679174076, 92.0], [4.8358091145833555, 0.3072036458333116, 293.6647679174076, 99.0], [0.49788177083331675, 0.27542395833333444, 261.6255653005986, 87.0], [3.3050875000000133, 0.12711874999999395, 329.6275569128699, 75.0], [0.275423958333306, 0.296610416666681, 329.6275569128699, 99.0], [0.24364427083335727, 0.47669531249999864, 369.9944227116344, 78.0], [0.49788177083331675, 0.6514835937500152, 329.6275569128699, 84.0], [2.817798958333327, 0.16949166666668702, 369.9944227116344, 104.0], [0.2489408854166868, 0.44491562499999304, 369.9944227116344, 94.0], [0.5137716145833338, 0.3548731770833342, 369.9944227116344, 86.0], [0.48199192708332816, 0.29661041666665255, 369.9944227116344, 96.0], [0.5667377604166575, 0.36016979166669216, 329.6275569128699, 78.0], [0.4713986979166691, 0.41313593750001587, 329.6275569128699, 92.0], [0.48199192708332816, 0.4713986979166691, 369.9944227116344, 92.0], [0.5402546875000098, 0.4872885416666577, 329.6275569128699, 92.0], [0.5031783854166747, 0.5985174479166631, 293.6647679174076, 83.0], [4.8199192708333385, 0.3072036458333116, 293.6647679174076, 103.0], [0.5031783854166463, 0.31250026041666956, 261.6255653005986, 84.0], [3.3156807291666723, 0.09004244791665883, 329.6275569128699, 97.0], [0.2542375000000163, 0.24894088541665838, 329.6275569128699, 107.0], [0.26483072916664696, 0.42372916666667493, 369.9944227116344, 90.0], [0.49258515625001564, 0.6514835937499868, 329.6275569128699, 94.0], [2.817798958333327, 0.15360182291667002, 369.9944227116344, 92.0], [0.23834765624999932, 0.39194947916666933, 369.9944227116344, 82.0], [0.4872885416666577, 0.3866528645833398, 369.9944227116344, 94.0], [0.5402546875000098, 0.2701273437500049, 369.9944227116344, 107.0], [0.5402546875000098, 0.34427994791664673, 329.6275569128699, 91.0], [0.4713986979166407, 0.42902578125000446, 329.6275569128699, 92.0], [0.5031783854166747, 0.40783932291668634, 369.9944227116344, 94.0], [0.5137716145833338, 0.4661020833333396, 329.6275569128699, 97.0], [0.5084750000000042, 0.7944921874999977, 293.6647679174076, 92.0]]\n",
      "[[62.0, 0.25, 0.0], [62.0, 1.0, 0.5], [60.0, 1.0, 1.0], [64.0, 0.75, 16.0], [64.0, 1.0, 0.0], [64.0, 3.0, 2.0], [66.0, 1.0, 8.0], [66.0, 1.0, 0.0], [66.0, 1.5, 0.0], [66.0, 2.0, 0.0], [64.0, 2.5, 0.0], [64.0, 2.0, 0.0], [66.0, 2.0, 0.0], [64.0, 2.0, 0.0], [62.0, 3.0, 0.0], [62.0, 1.0, 16.0], [60.0, 1.0, 1.0], [64.0, 1.0, 8.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [62.0, 1.0, 1.0], [64.0, 3.0, 0.0], [66.0, 1.0, 8.0], [66.0, 1.0, 0.0], [66.0, 1.0, 0.0], [66.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 1.0, 0.0], [64.0, 2.0, 1.0], [62.0, 1.5, 0.0], [62.0, 3.0, 0.0], [79.0, 2.5, 16.0], [74.0, 1.0, 0.0], [74.0, 2.0, 0.0], [72.0, 1.0, 0.0], [71.0, 1.0, 0.0], [62.0, 1.0, 4.0], [62.0, 1.0, 0.0], [62.0, 1.0, 0.0], [64.0, 2.0, 1.0], [60.0, 1.0, 1.0], [67.0, 1.0, 0.0], [71.0, 1.0, 0.0], [72.0, 1.0, 0.0], [67.0, 1.0, 0.0], [67.0, 2.0, 0.0], [66.0, 1.0, 8.0], [62.0, 1.5, 1.0], [59.0, 2.0, 0.0], [62.0, 1.0, 1.0], [67.0, 1.0, 2.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [71.0, 6.0, 2.0], [62.0, 1.0, 4.0], [62.0, 1.0, 0.0], [60.0, 1.0, 0.0], [62.0, 1.0, 0.0], [64.0, 1.0, 0.0], [62.0, 2.0, 1.0], [64.0, 1.0, 8.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [64.0, 4.0, 2.0], [66.0, 1.5, 8.0], [66.0, 1.0, 0.0], [66.0, 1.0, 0.0], [66.0, 2.5, 0.0], [66.0, 2.0, 0.0], [64.0, 1.5, 0.0], [64.0, 2.0, 0.0], [66.0, 2.0, 0.0], [64.0, 2.0, 0.0], [62.0, 2.0, 0.0], [62.0, 1.0, 16.0], [62.0, 1.0, 0.0], [62.0, 1.0, 0.0], [62.0, 3.0, 2.0], [64.0, 1.0, 8.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [64.0, 3.0, 2.0], [66.0, 1.5, 8.0], [66.0, 1.0, 0.0], [66.0, 1.0, 1.0], [66.0, 1.0, 0.0], [64.0, 1.0, 2.0], [64.0, 1.0, 0.0], [62.0, 2.5, 0.0], [62.0, 3.0, 0.0], [71.0, 1.0, 2.0], [76.0, 4.5, 2.0], [76.0, 1.0, 0.0], [76.0, 1.0, 8.0], [78.0, 1.0, 0.0], [79.0, 1.0, 0.0], [79.0, 1.5, 0.0], [74.0, 1.0, 0.0], [74.0, 2.0, 0.0], [72.0, 1.0, 0.0], [71.0, 1.0, 0.0], [62.0, 1.0, 4.0], [62.0, 1.0, 0.0], [62.0, 1.0, 0.0], [67.0, 1.0, 4.0], [72.0, 1.0, 1.0], [67.0, 0.5, 0.0], [66.0, 2.0, 8.0], [62.0, 2.0, 0.0], [59.0, 2.0, 0.0], [62.0, 1.0, 1.0], [64.0, 8.0, 32.0], [71.0, 6.5, 4.0], [72.0, 1.0, 0.0], [71.0, 0.5, 0.0], [66.0, 8.0, 0.0], [69.0, 6.0, 0.0], [62.0, 0.75, 4.0], [62.0, 1.0, 0.0], [64.0, 1.0, 0.0], [60.0, 1.0, 0.0], [62.0, 2.0, 0.0], [64.0, 0.75, 8.0], [64.0, 1.0, 0.0], [66.0, 2.0, 0.0], [64.0, 2.0, 0.0], [66.0, 1.5, 8.0], [66.0, 1.5, 0.0], [66.0, 1.5, 0.0], [66.0, 1.0, 0.0], [64.0, 1.0, 0.0], [64.0, 1.5, 0.0], [64.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 2.0, 0.0], [62.0, 3.0, 0.0], [62.0, 1.0, 16.0], [64.0, 1.0, 0.0], [60.0, 1.0, 0.0], [64.0, 1.0, 8.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0], [62.0, 1.0, 1.0], [66.0, 1.0, 8.0], [66.0, 1.0, 0.0], [66.0, 1.0, 0.0], [66.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 1.5, 0.0], [64.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 2.5, 0.0], [62.0, 3.0, 0.0], [62.0, 1.5, 16.0], [60.0, 1.0, 0.0], [64.0, 1.0, 8.0], [64.0, 1.0, 0.0], [66.0, 2.0, 0.0], [64.0, 3.0, 0.0], [66.0, 1.0, 8.0], [66.0, 2.5, 0.0], [66.0, 1.5, 0.0], [66.0, 1.0, 0.0], [64.0, 1.5, 1.0], [64.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 2.0, 0.0], [62.0, 2.0, 0.0], [62.0, 1.0, 16.0], [60.0, 1.0, 1.0], [64.0, 0.75, 8.0], [64.0, 1.0, 0.0], [66.0, 2.5, 0.0], [64.0, 3.0, 0.0], [66.0, 1.0, 8.0], [66.0, 2.5, 0.0], [66.0, 2.5, 0.0], [66.0, 1.0, 0.0], [64.0, 1.5, 1.0], [64.0, 2.5, 0.0], [66.0, 2.5, 0.0], [64.0, 2.0, 0.0], [62.0, 3.0, 0.0]]\n",
      "[\"We're\", 'caught', 'in', 'trap', 'I', 'walk', 'be', 'cause', 'I', 'love', 'you', 'too', 'much', 'ba', 'by', 'Why', \"can't\", 'see', 'what', \"you're\", 'do', \"in'\", 'me', 'when', 'you', \"don't\", 'be', 'lieve', 'a', 'word', 'I', 'We', \"can't\", 'go', 'on', 'to', 'geth', 'er', 'with', 'sus', 'pi', 'cious', 'minds', 'And', 'we', \"can't\", 'build', 'our', 'dreams', 'on', 'sus', 'pi', 'cious', 'minds', 'So', 'if', 'an', 'old', 'friend', 'I', 'know', 'stops', 'by', 'to', 'say', 'lo', 'would', 'I', 'still', 'see', 'sus', 'pi', 'cion', 'in', 'your', 'Here', 'we', 'go', 'a', 'gain', 'ask', \"in'\", 'where', 'been', 'You', \"can't\", 'see', 'the', 'tears', 'are', 'real', 'cry', \"in'\", 'We', \"can't\", 'go', 'on', 'to', 'geth', 'er', 'with', 'sus', 'pi', 'cious', 'minds', 'And', 'we', \"can't\", 'build', 'our', 'dreams', 'on', 'pi', 'Oh', 'sur', 'dry', 'tears', 'your', \"Let's\", 'let', 'good', 'die', 'hon', 'you', 'I', 'nev', 'lied', 'you', 'mm', 'yeah!', 'caught', 'a', 'I', 'walk', 'be', 'I', 'love', 'too', 'ba', 'Why', 'you', 'see', 'what', \"you're\", \"in'\", 'me', 'when', 'you', \"don't\", 'be', 'lieve', 'a', 'word', 'I', 'But', \"don't\", 'know', 'caught', 'a', 'I', 'walk', 'out', 'be', 'cause', 'I', 'love', 'you', 'too', 'much', 'by', 'But', 'you', \"I'm\", 'in', 'trap', 'I', \"can't\", 'out', 'be', 'cause', 'I', 'love', 'you', 'too']\n"
     ]
    }
   ],
   "source": [
    "for i in fnames_s:\n",
    "    print(i)\n",
    "    data = np.load(i, allow_pickle=True)\n",
    "    # data = data[0][2]\n",
    "    # print(data)\n",
    "#     print(data[0][0][0])\n",
    "#     print(len(data[0]))\n",
    "#     print(data[1][0][0])\n",
    "#     print(len(data[0]))\n",
    "#     print(data[2][0][0])\n",
    "#     print(len(data[0]))\n",
    "    print(data[0][0])\n",
    "    print(data[0][1])\n",
    "    print(data[0][2])\n",
    "    out_data=data\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "ngrams = zip(*[out_data[0][2][i:] for i in range(20)])\n",
    "some_data=[list(ngram) for ngram in ngrams]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.load(i, allow_pickle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "numpy.ndarray"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "[\"We're\",\n 'caught',\n 'in',\n 'trap',\n 'I',\n 'walk',\n 'be',\n 'cause',\n 'I',\n 'love',\n 'you',\n 'too',\n 'much',\n 'ba',\n 'by',\n 'Why',\n \"can't\",\n 'see',\n 'what',\n \"you're\",\n 'do',\n \"in'\",\n 'me',\n 'when',\n 'you',\n \"don't\",\n 'be',\n 'lieve',\n 'a',\n 'word',\n 'I',\n 'We',\n \"can't\",\n 'go',\n 'on',\n 'to',\n 'geth',\n 'er',\n 'with',\n 'sus',\n 'pi',\n 'cious',\n 'minds',\n 'And',\n 'we',\n \"can't\",\n 'build',\n 'our',\n 'dreams',\n 'on',\n 'sus',\n 'pi',\n 'cious',\n 'minds',\n 'So',\n 'if',\n 'an',\n 'old',\n 'friend',\n 'I',\n 'know',\n 'stops',\n 'by',\n 'to',\n 'say',\n 'lo',\n 'would',\n 'I',\n 'still',\n 'see',\n 'sus',\n 'pi',\n 'cion',\n 'in',\n 'your',\n 'Here',\n 'we',\n 'go',\n 'a',\n 'gain',\n 'ask',\n \"in'\",\n 'where',\n 'been',\n 'You',\n \"can't\",\n 'see',\n 'the',\n 'tears',\n 'are',\n 'real',\n 'cry',\n \"in'\",\n 'We',\n \"can't\",\n 'go',\n 'on',\n 'to',\n 'geth',\n 'er',\n 'with',\n 'sus',\n 'pi',\n 'cious',\n 'minds',\n 'And',\n 'we',\n \"can't\",\n 'build',\n 'our',\n 'dreams',\n 'on',\n 'pi',\n 'Oh',\n 'sur',\n 'dry',\n 'tears',\n 'your',\n \"Let's\",\n 'let',\n 'good',\n 'die',\n 'hon',\n 'you',\n 'I',\n 'nev',\n 'lied',\n 'you',\n 'mm',\n 'yeah!',\n 'caught',\n 'a',\n 'I',\n 'walk',\n 'be',\n 'I',\n 'love',\n 'too',\n 'ba',\n 'Why',\n 'you',\n 'see',\n 'what',\n \"you're\",\n \"in'\",\n 'me',\n 'when',\n 'you',\n \"don't\",\n 'be',\n 'lieve',\n 'a',\n 'word',\n 'I',\n 'But',\n \"don't\",\n 'know',\n 'caught',\n 'a',\n 'I',\n 'walk',\n 'out',\n 'be',\n 'cause',\n 'I',\n 'love',\n 'you',\n 'too',\n 'much',\n 'by',\n 'But',\n 'you',\n \"I'm\",\n 'in',\n 'trap',\n 'I',\n \"can't\",\n 'out',\n 'be',\n 'cause',\n 'I',\n 'love',\n 'you',\n 'too']"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0][2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "'W'"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "'c'"
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[1][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Remixed'"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[2][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "'in'"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "fnames = word_level_dir.iterdir()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[[62.0, 1.0, 0.5]], [[60.0, 1.0, 1.0]], [[64.0, 0.75, 16.0]], [[64.0, 1.0, 0.0]], [[64.0, 3.0, 2.0]], [[66.0, 1.0, 8.0], [66.0, 1.0, 0.0]], [[66.0, 1.5, 0.0]], [[66.0, 2.0, 0.0]], [[64.0, 2.5, 0.0]], [[64.0, 2.0, 0.0]], [[66.0, 2.0, 0.0]], [[64.0, 2.0, 0.0], [62.0, 3.0, 0.0]], [[62.0, 1.0, 16.0]], [[64.0, 1.0, 8.0]], [[64.0, 1.0, 0.0]], [[66.0, 1.0, 8.0]], [[66.0, 1.0, 0.0]], [[66.0, 1.0, 0.0]], [[66.0, 2.5, 0.0], [64.0, 1.0, 0.0]], [[64.0, 2.0, 1.0]], [[62.0, 1.5, 0.0]], [[62.0, 3.0, 0.0]], [[79.0, 2.5, 16.0]], [[74.0, 2.0, 0.0]], [[72.0, 1.0, 0.0]], [[71.0, 1.0, 0.0], [62.0, 1.0, 4.0], [62.0, 1.0, 0.0]], [[62.0, 1.0, 0.0]], [[64.0, 2.0, 1.0], [60.0, 1.0, 1.0], [67.0, 1.0, 0.0]], [[71.0, 1.0, 0.0]], [[72.0, 1.0, 0.0]], [[67.0, 1.0, 0.0]], [[66.0, 1.0, 8.0]], [[62.0, 1.5, 1.0]], [[59.0, 2.0, 0.0]], [[62.0, 1.0, 1.0]], [[67.0, 1.0, 2.0], [64.0, 1.0, 0.0], [64.0, 1.0, 0.0]], [[71.0, 6.0, 2.0]], [[62.0, 1.0, 4.0]], [[62.0, 1.0, 0.0]], [[60.0, 1.0, 0.0]], [[62.0, 1.0, 0.0]], [[64.0, 1.0, 0.0]], [[62.0, 2.0, 1.0]], [[64.0, 1.0, 8.0]], [[64.0, 1.0, 0.0]], [[64.0, 1.0, 0.0]], [[64.0, 1.0, 0.0]], [[64.0, 4.0, 2.0]], [[66.0, 1.0, 0.0]], [[66.0, 1.0, 0.0]], [[66.0, 2.5, 0.0]], [[66.0, 2.0, 0.0]], [[64.0, 1.5, 0.0], [64.0, 2.0, 0.0], [66.0, 2.0, 0.0]], [[64.0, 2.0, 0.0]], [[62.0, 2.0, 0.0]], [[62.0, 1.0, 16.0]], [[62.0, 1.0, 0.0]], [[62.0, 1.0, 0.0]], [[62.0, 3.0, 2.0], [64.0, 1.0, 8.0]], [[66.0, 1.5, 8.0]], [[66.0, 1.0, 0.0]], [[66.0, 1.0, 0.0]], [[64.0, 1.0, 2.0]], [[64.0, 1.0, 0.0]], [[62.0, 2.5, 0.0]], [[62.0, 3.0, 0.0]], [[76.0, 1.0, 0.0]], [[78.0, 1.0, 0.0]], [[79.0, 1.0, 0.0]], [[79.0, 1.5, 0.0], [74.0, 1.0, 0.0], [74.0, 2.0, 0.0]], [[72.0, 1.0, 0.0]], [[71.0, 1.0, 0.0], [62.0, 1.0, 4.0], [62.0, 1.0, 0.0]], [[62.0, 1.0, 0.0]], [[67.0, 1.0, 4.0]], [[72.0, 1.0, 1.0]], [[66.0, 2.0, 8.0]], [[62.0, 2.0, 0.0]], [[59.0, 2.0, 0.0]], [[62.0, 1.0, 1.0]], [[71.0, 6.5, 4.0]], [[66.0, 8.0, 0.0]], [[69.0, 6.0, 0.0]], [[66.0, 1.5, 8.0]], [[64.0, 1.5, 0.0]], [[64.0, 1.0, 8.0]], [[66.0, 2.5, 0.0]], [[62.0, 3.0, 0.0]], [[64.0, 3.0, 0.0]], [[66.0, 2.5, 0.0]], [[66.0, 1.0, 8.0]], [[66.0, 2.5, 0.0]], [[66.0, 1.0, 0.0], [64.0, 1.5, 1.0]], [[64.0, 2.5, 0.0]], [[66.0, 2.5, 0.0]], [[64.0, 2.0, 0.0]], [[62.0, 3.0, 0.0]]]\n",
      "[['caught'], ['in'], ['trap'], ['I'], ['walk'], ['because', 'because'], ['I'], ['love'], ['you'], ['too'], ['much'], ['baby', 'baby'], ['Why'], ['see'], ['what'], ['me'], ['when'], ['you'], ['believe', 'believe'], ['a'], ['word'], ['I'], ['We'], ['go'], ['on'], ['together', 'together', 'together'], ['with'], ['suspicious', 'suspicious', 'suspicious'], ['minds'], ['And'], ['we'], ['build'], ['our'], ['dreams'], ['on'], ['suspicious', 'suspicious', 'suspicious'], ['minds'], ['So'], ['if'], ['an'], ['old'], ['friend'], ['I'], ['know'], ['stops'], ['by'], ['to'], ['say'], ['would'], ['I'], ['still'], ['see'], ['suspicion', 'suspicion', 'suspicion'], ['in'], ['your'], ['Here'], ['we'], ['go'], ['again', 'again'], ['been'], ['You'], ['see'], ['the'], ['tears'], ['are'], ['real'], ['We'], ['go'], ['on'], ['together', 'together', 'together'], ['with'], ['suspicious', 'suspicious', 'suspicious'], ['minds'], ['And'], ['we'], ['build'], ['our'], ['dreams'], ['on'], ['Oh'], ['tears'], ['your'], ['you'], ['I'], ['you'], ['a'], ['I'], ['I'], ['you'], ['I'], ['out'], ['because', 'because'], ['I'], ['love'], ['you'], ['too']]\n"
     ]
    }
   ],
   "source": [
    "for i in fnames:\n",
    "    word_data = np.load(i, allow_pickle=True)\n",
    "    print(word_data[0][1])\n",
    "    word_data = word_data[0][2]\n",
    "    print(word_data)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "['caught',\n 'in',\n 'trap',\n 'I',\n 'walk',\n 'because',\n 'I',\n 'love',\n 'you',\n 'too',\n 'much',\n 'baby',\n 'Why',\n 'see',\n 'what',\n 'me',\n 'when',\n 'you',\n 'believe',\n 'a',\n 'word',\n 'I',\n 'We',\n 'go',\n 'on',\n 'together',\n 'with',\n 'suspicious',\n 'minds',\n 'And',\n 'we',\n 'build',\n 'our',\n 'dreams',\n 'on',\n 'suspicious',\n 'minds',\n 'So',\n 'if',\n 'an',\n 'old',\n 'friend',\n 'I',\n 'know',\n 'stops',\n 'by',\n 'to',\n 'say',\n 'would',\n 'I',\n 'still',\n 'see',\n 'suspicion',\n 'in',\n 'your',\n 'Here',\n 'we',\n 'go',\n 'again',\n 'been',\n 'You',\n 'see',\n 'the',\n 'tears',\n 'are',\n 'real',\n 'We',\n 'go',\n 'on',\n 'together',\n 'with',\n 'suspicious',\n 'minds',\n 'And',\n 'we',\n 'build',\n 'our',\n 'dreams',\n 'on',\n 'Oh',\n 'tears',\n 'your',\n 'you',\n 'I',\n 'you',\n 'a',\n 'I',\n 'I',\n 'you',\n 'I',\n 'out',\n 'because',\n 'I',\n 'love',\n 'you',\n 'too']"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[i[0] for i in word_data]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}